library(dplyr)
library(survival)
library(mice)
library(MatchThem)
library(survey)
library(here)
library(gtsummary)
library(parallelly)
library(ranger)
library(furrr)
source(here("functions", "source_encore.io_functions.R"))
# track time
runtime <- tictoc::tic()3 Application in Cox PH models
Comparison of coxph versus svycoxph after multiple imputation and propensity score matching
This is a reproducible example on how to use coxph and svycoxph in combination with multiple imputation and propensity score matching using a mimids object from the MatchThem package.
Load packages:
3.1 Data generation
We use the simulate_flaura() function to simulate a realistic oncology comparative effectiveness cohort analytic dataset.
# load example dataset with missing observations
data_miss <- simulate_flaura(
n_total = 3500,
treat_prevalence = .51,
seed = 42,
include_id = FALSE,
imposeNA = TRUE
)
covariates <- data_miss |>
select(starts_with("c_"), starts_with("dem_")) |>
colnames()
data_miss |>
tbl_summary(
by = "treat",
include = covariates
)Characteristic |
0 |
1 |
|---|---|---|
| c_smoking_history | 579 (51%) | 520 (43%) |
| Unknown | 578 | 584 |
| c_number_met_sites | ||
| 1 | 837 (74%) | 899 (75%) |
| 2 | 249 (22%) | 255 (21%) |
| 3 | 41 (3.6%) | 42 (3.5%) |
| 4 | 7 (0.6%) | 8 (0.7%) |
| Unknown | 578 | 584 |
| c_ecog_cont | 714 (63%) | 637 (53%) |
| Unknown | 578 | 584 |
| c_stage_initial_dx_cont | ||
| 1 | 101 (8.9%) | 0 (0%) |
| 2 | 17 (1.5%) | 12 (1.0%) |
| 3 | 15 (1.3%) | 38 (3.2%) |
| 4 | 1,001 (88%) | 1,154 (96%) |
| Unknown | 578 | 584 |
| c_hemoglobin_g_dl_cont | 12.97 (12.16, 13.72) | 12.89 (11.98, 13.75) |
| Unknown | 578 | 584 |
| c_urea_nitrogen_mg_dl_cont | 2.76 (2.42, 3.08) | 2.77 (2.58, 2.97) |
| Unknown | 578 | 584 |
| c_platelets_10_9_l_cont | 255 (218, 290) | 266 (230, 302) |
| Unknown | 578 | 584 |
| c_calcium_mg_dl_cont | 2.23 (2.21, 2.25) | 2.24 (2.21, 2.27) |
| Unknown | 578 | 584 |
| c_glucose_mg_dl_cont | 4.64 (4.55, 4.72) | 4.65 (4.58, 4.73) |
| Unknown | 578 | 584 |
| c_lymphocyte_leukocyte_ratio_cont | 2.94 (2.81, 3.08) | 2.93 (2.82, 3.04) |
| Unknown | 578 | 584 |
| c_alp_u_l_cont | 4.47 (4.33, 4.61) | 4.51 (4.42, 4.59) |
| Unknown | 578 | 584 |
| c_protein_g_l_cont | 67.8 (65.1, 70.6) | 69.0 (66.0, 72.1) |
| Unknown | 578 | 584 |
| c_alt_u_l_cont | 2.93 (2.72, 3.14) | 2.86 (2.64, 3.10) |
| Unknown | 578 | 584 |
| c_albumin_g_l_cont | 39.01 (36.94, 41.01) | 39.98 (37.76, 42.07) |
| Unknown | 578 | 584 |
| c_bilirubin_mg_dl_cont | -0.71 (-1.45, 0.07) | -0.85 (-1.74, -0.03) |
| Unknown | 578 | 584 |
| c_chloride_mmol_l_cont | 102.08 (100.08, 104.20) | 102.05 (100.20, 104.12) |
| Unknown | 578 | 584 |
| c_monocytes_10_9_l_cont | -0.53 (-0.78, -0.24) | -0.51 (-0.68, -0.35) |
| Unknown | 578 | 584 |
| c_eosinophils_leukocytes_ratio_cont | 0.72 (0.50, 0.97) | 0.69 (0.28, 1.07) |
| Unknown | 578 | 584 |
| c_ldh_u_l_cont | 1.68 (1.65, 1.71) | 1.69 (1.65, 1.72) |
| Unknown | 578 | 584 |
| c_hr_cont | 4.41 (4.39, 4.43) | 4.43 (4.40, 4.46) |
| Unknown | 578 | 584 |
| c_sbp_cont | 4.85 (4.77, 4.92) | 4.85 (4.79, 4.92) |
| Unknown | 578 | 584 |
| c_oxygen_cont | 97.000 (96.986, 97.014) | 97.001 (96.993, 97.007) |
| Unknown | 578 | 584 |
| c_neutrophil_lymphocyte_ratio_cont | 1.33 (1.11, 1.56) | 1.28 (1.02, 1.52) |
| Unknown | 578 | 584 |
| c_bmi_cont | 3.23 (3.14, 3.31) | 3.23 (3.14, 3.31) |
| Unknown | 578 | 584 |
| c_ast_alt_ratio_cont | 0.09 (-0.10, 0.30) | 0.12 (-0.07, 0.32) |
| Unknown | 578 | 584 |
| c_time_dx_to_index | 73 (43, 103) | 43 (32, 55) |
| Unknown | 578 | 584 |
| c_de_novo_mets_dx | 774 (68%) | 945 (78%) |
| Unknown | 578 | 584 |
| c_height_cont | 1.64 (1.59, 1.69) | 1.65 (1.59, 1.70) |
| Unknown | 578 | 584 |
| c_weight_cont | 68 (60, 76) | 69 (61, 76) |
| Unknown | 578 | 584 |
| c_dbp_cont | 75 (70, 79) | 76 (72, 80) |
| Unknown | 578 | 584 |
| c_year_index | ||
| <2018 | 87 (5.1%) | 1,703 (95%) |
| 2018+ | 1,625 (95%) | 85 (4.8%) |
| dem_age_index_cont | 70 (63, 76) | 69 (64, 74) |
| dem_sex_cont | 614 (36%) | 569 (32%) |
| dem_race | 664 (59%) | 753 (63%) |
| Unknown | 578 | 584 |
| dem_region | ||
| Midwest | 123 (11%) | 137 (11%) |
| Northeast | 382 (34%) | 390 (32%) |
| South | 409 (36%) | 456 (38%) |
| West | 220 (19%) | 221 (18%) |
| Unknown | 578 | 584 |
| dem_ses | ||
| 1 | 102 (9.0%) | 217 (18%) |
| 2 | 152 (13%) | 157 (13%) |
| 3 | 302 (27%) | 210 (17%) |
| 4 | 271 (24%) | 291 (24%) |
| 5 | 307 (27%) | 329 (27%) |
| Unknown | 578 | 584 |
| 1
n (%); Median (Q1, Q3) |
||
3.2 Multiple imputation
Multiple imputation using mice:
# impute data
data_imp <- futuremice(
parallelseed = 42,
n.core = parallel::detectCores()-1,
data = data_miss,
method = "rf",
m = 10,
print = FALSE
)3.3 Propensity score matching and weighting
Apply propensity score matching and weighting with replacement within in each imputed dataset:
# apply propensity score matching on mids object
ps_form <- as.formula(paste("treat ~", paste(covariates, collapse = " + ")))
# matching
data_mimids <- matchthem(
formula = ps_form,
datasets = data_imp,
approach = 'within',
method = 'nearest',
caliper = 0.01,
ratio = 1,
replace = F
)
# SMR weighting
data_wimids <- weightthem(
formula = ps_form,
datasets = data_imp,
approach = 'within',
method = "glm",
estimand = "ATT"
)
# trim extreme weights
data_wimids <- trim(
x = data_wimids,
at = .95,
lower = TRUE
)3.4 Outcome model comparisons
Next, we compare the marginal treatment effect estimates coming from a Cox proportional hazards model after propensity score matching and weighting as implemented in the coxph() and in the svycoxph() functions.
From the MatchThem documentation:
with()applies the supplied model inexprto the (matched or weighted) multiply imputed datasets, automatically incorporating the (matching) weights when possible. The argument toexprshould be of the formglm(y ~ z, family = quasibinomial), for example, excluding the data or weights argument, which are automatically supplied.Functions from the survey package, such as
svyglm(), are treated a bit differently. Nosvydesignobject needs to be supplied becausewith()automatically constructs and supplies it with the imputed dataset and estimated weights. Whencluster = TRUE(orwith()detects that pairs should be clustered; see theclusterargument above), pair membership is supplied to theidsargument ofsvydesign().After weighting using
weightthem(),glm_weightit()should be used as the modeling function to fit generalized linear models. It correctly produces robust standard errors that account for estimation of the weights, if possible. SeeWeightIt::glm_weightit()for details. Otherwise,svyglm()should be used rather thanglm()in order to correctly compute standard errors.For Cox models,
coxph()will produce approximately correct standard errors when used with weighting, butsvycoxph()will produce more accurate standard errors when matching is used.
We now want to compare treatment effect estimates for treat when computed (a) using coxph (survival package) and (b) svycoxph (survey package). More information on estimating treatment effects after matching is provided in https://kosukeimai.github.io/MatchIt/articles/estimating-effects.html#survival-outcomes
3.4.0.1 coxph
# coxph result
coxph_results <- with(
data = data_mimids,
expr = coxph(formula = Surv(fu_itt_months, death_itt) ~ treat,
weights = weights,
cluster = subclass,
robust = TRUE
)
) |>
pool() |>
tidy(exponentiate = TRUE, conf.int = TRUE) |>
mutate(package = "survival") |>
select(package, term, estimate, std.error, conf.low, conf.high)
coxph_results3.4.0.2 svycoxph
# svycoxph result
svycoxph_results <- with(
data = data_mimids,
expr = svycoxph(formula = Surv(fu_itt_months, death_itt) ~ treat),
cluster = TRUE
) |>
pool() |>
tidy(exponentiate = TRUE, conf.int = TRUE) |>
mutate(package = "survey") |>
select(package, term, estimate, std.error, conf.low, conf.high)
svycoxph_results3.4.0.3 Summary
rbind(coxph_results, svycoxph_results) package term estimate std.error conf.low conf.high
1 survival treat 0.6809175 0.1662638 0.4841641 0.9576269
2 survey treat 0.6809175 0.1665147 0.4853937 0.9552010
3.4.0.4 coxph
# coxph result
coxph_results <- with(
data = data_wimids,
expr = coxph(formula = Surv(fu_itt_months, death_itt) ~ treat,
weights = weights,
robust = TRUE
)
) |>
pool() |>
tidy(exponentiate = TRUE, conf.int = TRUE) |>
mutate(package = "survival") |>
select(package, term, estimate, std.error, conf.low, conf.high)
coxph_results3.4.0.5 svycoxph
# svycoxph result
svycoxph_results <- with(
data = data_wimids,
expr = svycoxph(formula = Surv(fu_itt_months, death_itt) ~ treat),
cluster = TRUE
) |>
pool() |>
tidy(exponentiate = TRUE, conf.int = TRUE) |>
mutate(package = "survey") |>
select(package, term, estimate, std.error, conf.low, conf.high)
svycoxph_results3.4.0.6 Summary
rbind(coxph_results, svycoxph_results) package term estimate std.error conf.low conf.high
1 survival treat 0.7646088 0.07472081 0.6598544 0.8859933
2 survey treat 0.7646088 0.07472930 0.6598863 0.8859505
3.5 Session info
Script runtime: 0.95 minutes.
pander::pander(subset(data.frame(sessioninfo::package_info()), attached==TRUE, c(package, loadedversion)))| package | loadedversion | |
|---|---|---|
| dplyr | dplyr | 1.1.4 |
| furrr | furrr | 0.3.1 |
| future | future | 1.34.0 |
| gtsummary | gtsummary | 2.0.1 |
| here | here | 1.0.1 |
| MatchThem | MatchThem | 1.2.1 |
| Matrix | Matrix | 1.7-0 |
| mice | mice | 3.16.0 |
| parallelly | parallelly | 1.38.0 |
| ranger | ranger | 0.16.0 |
| survey | survey | 4.4-2 |
| survival | survival | 3.5-8 |
pander::pander(sessionInfo())R version 4.4.0 (2024-04-24)
Platform: x86_64-pc-linux-gnu
locale: LC_CTYPE=C.UTF-8, LC_NUMERIC=C, LC_TIME=C.UTF-8, LC_COLLATE=C.UTF-8, LC_MONETARY=C.UTF-8, LC_MESSAGES=C.UTF-8, LC_PAPER=C.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=C.UTF-8 and LC_IDENTIFICATION=C
attached base packages: grid, stats, graphics, grDevices, datasets, utils, methods and base
other attached packages: furrr(v.0.3.1), future(v.1.34.0), ranger(v.0.16.0), parallelly(v.1.38.0), gtsummary(v.2.0.1), here(v.1.0.1), survey(v.4.4-2), Matrix(v.1.7-0), MatchThem(v.1.2.1), mice(v.3.16.0), survival(v.3.5-8) and dplyr(v.1.1.4)
loaded via a namespace (and not attached): tidyselect(v.1.2.1), fastmap(v.1.2.0), digest(v.0.6.37), rpart(v.4.1.23), lifecycle(v.1.0.4), magrittr(v.2.0.3), compiler(v.4.4.0), rlang(v.1.1.4), sass(v.0.4.9), tools(v.4.4.0), utf8(v.1.2.4), yaml(v.2.3.10), gt(v.0.11.0), knitr(v.1.48), htmlwidgets(v.1.6.4), xml2(v.1.3.6), withr(v.3.0.1), purrr(v.1.0.2), nnet(v.7.3-19), fansi(v.1.0.6), jomo(v.2.7-6), colorspace(v.2.1-1), ggplot2(v.3.5.1), globals(v.0.16.3), scales(v.1.3.0), iterators(v.1.0.14), MASS(v.7.3-60.2), cli(v.3.6.3), rmarkdown(v.2.28), crayon(v.1.5.3), generics(v.0.1.3), sessioninfo(v.1.2.2), commonmark(v.1.9.1), minqa(v.1.2.8), DBI(v.1.2.3), pander(v.0.6.5), stringr(v.1.5.1), splines(v.4.4.0), assertthat(v.0.2.1), parallel(v.4.4.0), base64enc(v.0.1-3), mitools(v.2.4), vctrs(v.0.6.5), WeightIt(v.1.3.0), boot(v.1.3-30), glmnet(v.4.1-8), jsonlite(v.1.8.8), mitml(v.0.4-5), listenv(v.0.9.1), foreach(v.1.5.2), tidyr(v.1.3.1), glue(v.1.7.0), nloptr(v.2.1.1), pan(v.1.9), chk(v.0.9.2), codetools(v.0.2-20), stringi(v.1.8.4), shape(v.1.4.6.1), gtable(v.0.3.5), lme4(v.1.1-35.5), munsell(v.0.5.1), tibble(v.3.2.1), pillar(v.1.9.0), htmltools(v.0.5.8.1), R6(v.2.5.1), rprojroot(v.2.0.4), evaluate(v.0.24.0), lattice(v.0.22-6), markdown(v.1.13), backports(v.1.5.0), cards(v.0.2.1), tictoc(v.1.2.1), MatchIt(v.4.5.5), broom(v.1.0.6), renv(v.1.0.7), simsurv(v.1.0.0), Rcpp(v.1.0.13), nlme(v.3.1-164), xfun(v.0.47) and pkgconfig(v.2.0.3)
pander::pander(options('repos'))repos:
REPO_NAME https://packagemanager.posit.co/cran/linux/noble/latest